* Merge in targeting index and run regressions
* 3/4/2025

clear all
set more off
set seed 1515

global index_vars sex erace eorigin edcat income_bucket age_bin
global income_bins = 5

program main
	read_sipp_waves
	append_sipp_panel
	generate_sipp_predictions
	FI_summ_stats
	targeting_regressions
end

program read_sipp_waves
	// download SIPP from https://www.census.gov/programs-surveys/sipp/data/datasets.html
	// originally downloaded May 15-June 3, 2024
	di in red "starting read_sipp_waves"
	foreach filename in 2014w1 2014w2 2014w3 2014w4 2018 {
		if "`filename'" == "2014w3" {
			local file_suffix = "2014w3_13"
		}
		else {
			local file_suffix = "`filename'"
		}
		use ssuid spanel wpfinwgt swave monthcode shhadid tst_intv ///
		pnum tage ems rfpersons rfamref  ///
		esex eorigin eeduc erace efood* ///
		edisany eorigin ecitizen ///
		thtotinc tftotinc using "$raw//sipp/pu`file_suffix'.dta", clear
		
		save "$temp/sipp`filename'", replace
	}
	foreach filename in 2019 2020 {
		use SSUID SPANEL WPFINWGT SWAVE MONTHCODE SHHADID TST_INTV ///
		PNUM TAGE EMS RFPERSONS RFAMREF  ///
		ESEX EORIGIN EEDUC ERACE EFOOD* ///
		EDISANY EORIGIN ECITIZEN ///
		THTOTINC TFTOTINC using "$raw/sipp/pu`filename'.dta", clear
		ren _all, lower
		
		save "$temp/sipp`filename'", replace
	}
end

program append_sipp_panel
	clear
	forval i = 1/4 {
		append using "$temp/sipp2014w`i'"
		cap erase "$temp/sipp2014w`i'.dta"
	}
	forval year = 2018/2020 {
		append using "$temp/sipp`year'"
		cap erase "$temp/sipp`year'.dta"
	}
	save "$temp/sipp2014_20_panel", replace
end

program generate_sipp_predictions
	use "$temp/sipp2014_20_panel", clear
	assert !mi(tage)
	keep if tage >= 60  // restrict to ages in the ACS sample
	ren esex sex
	
	* generate education bins
	drop if eeduc == .
	assert inrange(eeduc, 31, 46)
	gen edcat = 1 if eeduc < 39
	replace edcat = 2 if eeduc == 39
	replace edcat = 3 if eeduc == 40 | eeduc == 41
	replace edcat = 4 if eeduc > 41
	
	* generate income buckets
	assert !mi(thtotinc)
	gen hhincome_year = thtotinc * 12  // convert to annual income
	sort hhincome_year swave spanel ssuid monthcode pnum // sort by income plus unique ID for stable sorting
	gen rank = _n / (_N+1)
	gen income_bucket = ceil(rank * ${income_bins})
	assert inrange(income_bucket, 1, ${income_bins})
	cap drop rank
	
	* generate age bins
	gen age_bin = 1 if tage < 70
	replace age_bin = 2 if tage >= 70 & tage < 80
	replace age_bin = 3 if tage >= 80
	
    gen food_insecure = (efood2 == 1 | efood3 == 1 | efood5 == 1)
	
	gisid ssuid pnum swave monthcode
	tostring pnum, gen(pnum_s)
	gen person_id = ssuid+pnum_s  // person identifier for clustering SEs

	save "$intermediate/sipp_for_FI_predictions", replace
	
    reg food_insecure i.sex i.erace i.eorigin i.edcat ///
		i.income_bucket i.age_bin [w = wpfinwgt], vce(cl person_id)
	predict pr_food_insecure, xb
	
	keep $index_vars pr_food_insecure
	duplicates drop
	
	sort $index_vars
	save "$for_analysis/predicted_food_insecurity", replace
	cap erase "$temp/sipp2014_20_panel.dta"
end

program FI_summ_stats
	use "$intermediate/sipp_for_FI_predictions", clear
	
	lab var sex "Sex"
	lab var erace "Race"
	lab var eorigin "Ethnicity"
	
	replace hhincome_year = hhincome_year / 10000 // income in ten thousands

	* run a series of LPMs
	reg food_insecure i.sex [w = wpfinwgt], vce(cl person_id)
	est sto sex
	reg food_insecure i.erace ib2.eorigin [w = wpfinwgt], vce(cl person_id)
	est sto race
	reg food_insecure i.edcat [w = wpfinwgt], vce(cl person_id)
	est sto educ
	reg food_insecure hhincome_year [w = wpfinwgt], vce(cl person_id)
	est sto income
	reg food_insecure i.age_bin [w = wpfinwgt], vce(cl person_id)
	est sto age
	reg food_insecure i.sex i.erace ib2.eorigin ///
		i.edcat hhincome_year i.age_bin [w = wpfinwgt], vce(cl person_id)
	est sto all

	* generate table
	esttab sex race educ income age all using ///
		"$output/FI_index_lpm.tex", replace ///
		drop(1.sex 1.erace 2.eorigin 1.edcat 1.age_bin _cons) nomtitles se(%5.3f) ///
		varlabels(2.sex "Female" 2.erace "Black" 3.erace "Asian" 4.erace "Other Race" ///
		1.eorigin "Hispanic" 2.edcat "HS Grad" 3.edcat "Some College" 4.edcat "College Grad" ///
		hhincome_year "Income (\$10,000s)" 2.age_bin "Age 70-79" 3.age_bin "Age 80+") ///
		nonote nocons booktabs stats(N, labels("N") fmt("%9.0fc"))
end

program targeting_regressions
	use "$for_analysis/state_for_regressions_actual_ssi", clear
	
	* generate variables to match the SIPP predictions dataset
	gen erace = 1 if race == 1
	replace erace = 2 if race == 2
	replace erace = 3 if race == 4 | race == 5 | race == 6
	replace erace = 4 if erace == .
	
	sort hhincome serial pernum sample  // sort by income (+household ID for stable sorting)
	gen rank = _n / (_N+1)
	gen income_bucket = ceil(rank* ${income_bins})
	assert inrange(income_bucket, 1, ${income_bins})
	cap drop rank
	
	gen age_bin = 1 if age < 70
	replace age_bin = 2 if age >= 70 & age < 80
	replace age_bin = 3 if age >= 80
	gen eorigin = (hispan == 0)
	
	merge m:1 $index_vars using "$for_analysis/predicted_food_insecurity.dta", ///
		assert(1 2 3) keep(3) nogen
	qui cou 
	di "sample size = `r(N)'"
	
	su pr_food_insecure, d
	local median_insecure = `r(p50)'
	
	reghdfe snap inter [w = perwt] if pr_food_insecure >= `median_insecure', ///
		a(i.statefip i.year) vce(cl statefip)
	est sto high_insecurity
	reghdfe snap inter [w = perwt] if pr_food_insecure < `median_insecure', ///
		a(i.statefip i.year) vce(cl statefip)
	est sto low_insecurity
	
	coefplot (high_insecurity, mlabels(inter = 1 "High Prb. Food Insecure")) ///
		(low_insecurity, mlabels(inter = 1 "Low Prb. Food Insecure")), ///
		keep(inter) xline(0, lp(dash)) msymbol(d) graphregion(fcolor(white) lcolor(white) ///
		lwidth(vvvthin)) ylab(none) legend(off) xlab(#12, grid gstyle(dot)) ///
		ciopts(recast(rcap)) xtitle("Coefficient and 95% CI") xscale(titlegap(2)) yscale(off)
	graph export "$output/above_below_median_coefs.png", replace	
		
	su pr_food_insecure, d
	local p25 = `r(p25)'
	local p50 = `r(p50)'
	local p75 = `r(p75)'
	
	reghdfe snap inter [w = perwt] if pr_food_insecure >= `p75', ///
		a(i.statefip i.year) vce(cl statefip)
	est sto highest_insecurity
	reghdfe snap inter [w = perwt] if pr_food_insecure >= `p50' & pr_food_insecure < `p75', ///
		a(i.statefip i.year) vce(cl statefip)
	est sto higher_insecurity
	reghdfe snap inter [w = perwt] if pr_food_insecure >= `p25' & pr_food_insecure < `p50', ///
		a(i.statefip i.year) vce(cl statefip)
	est sto med_insecurity
	reghdfe snap inter [w = perwt] if pr_food_insecure < `p25', ///
		a(i.statefip i.year) vce(cl statefip)
	est sto low_insecurity
	
	coefplot (highest_insecurity, mlabels(inter = 1 "Above 75th")) ///
		(higher_insecurity, mlabels(inter = 1 "50th-74th")) ///
		(med_insecurity, mlabels(inter = 1 "25th-49th")) ///
		(low_insecurity, mlabels(inter = 1 "Below 25th")), ///
		keep(inter) xline(0, lp(dash)) msymbol(d) graphregion(fcolor(white) lcolor(white) ///
		lwidth(vvvthin)) ylab(none) legend(off) xlab(#12, grid gstyle(dot)) ///
		ciopts(recast(rcap)) xtitle("Coefficient and 95% CI") xscale(titlegap(2)) yscale(off)
	graph export "$output/quartile_coefs.png", replace
end

* Execute
main
